Project 2

Extract 10000 tweets from Twitter using twitteR package including retweets.

Subset the retweets and the original tweets into a separate file

Plot the retweets and the original tweets using bar graph in vertical manner.

Include legends

Extract 10000 tweets from Twitter using twitteR package including retweets.

library(wordcloud)
## Loading required package: RColorBrewer
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(tm)
## Loading required package: NLP
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(RColorBrewer)
library(ggplot2)
library(twitteR)
## 
## Attaching package: 'twitteR'
## The following objects are masked from 'package:dplyr':
## 
##     id, location

SET-UP CREDENTIALS

CONSUMER_SECRET <- "laGMDCfJUSOReAgyw0Wbpxsw5mgbzb8lqo7cC2WU6tB4vrCrpB"
CONSUMER_KEY <- "3vdJJj4emRTPkNuNrp8dStj49"
ACCESS_SECRET <- "RNH4qsyvtRawmwH11qUargkQCKxf5tjvbsmZEQlYsF5Ud"
ACCESS_TOKEN <- "1592858497888444417-3jUbZnrov2hNroXf5KGV1M5rxIQIL0"

CONNECT TO TWITTER APP

setup_twitter_oauth(consumer_secret = CONSUMER_SECRET,
                    consumer_key = CONSUMER_KEY,
                    access_secret = ACCESS_SECRET,
                    access_token = ACCESS_TOKEN
)
## [1] "Using direct authentication"

Get 10000 observations “excluding retweets.

trendTweets <- searchTwitter("Philippines", 
                             n = 10000,
                             lang = "en",
                             since = "2022-11-24",
                             until = "2022-12-30",
                             retryOnRateLimit=120)
## [1] "Rate limited .... blocking for a minute and retrying up to 119 times ..."
## [1] "Rate limited .... blocking for a minute and retrying up to 118 times ..."
## [1] "Rate limited .... blocking for a minute and retrying up to 117 times ..."
## [1] "Rate limited .... blocking for a minute and retrying up to 116 times ..."
## [1] "Rate limited .... blocking for a minute and retrying up to 115 times ..."
## [1] "Rate limited .... blocking for a minute and retrying up to 114 times ..."
## [1] "Rate limited .... blocking for a minute and retrying up to 113 times ..."

twitter list to data frame

philippinesDF <- twListToDF(trendTweets)

Save and Load Data frame files

save(philippinesDF, file = "philippinesDF.Rdata")
load(file = "philippinesDF.Rdata")

Checking for missing values in data frame

saple_data <- sapply(philippinesDF, function(x) sum(is.na(x)))

Tweets

Subsetting using the dplyr() PACKAGE.

philippines_tweets <- philippinesDF %>%
  select(screenName,text,created, isRetweet) %>% filter(isRetweet == FALSE)
save(philippines_tweets, file = "philippines_tweetsF.Rdata")
load(file = "philippines_tweets.Rdata")
## Warning in readChar(con, 5L, useBytes = TRUE): cannot open compressed file
## 'philippines_tweets.Rdata', probable reason 'No such file or directory'
## Error in readChar(con, 5L, useBytes = TRUE): cannot open the connection

Grouping the data created.

philippines_tweets %>%  
  group_by(1) %>%  
  summarise(max = max(created), min = min(created))
## # A tibble: 1 × 3
##     `1` max                 min                
##   <dbl> <dttm>              <dttm>             
## 1     1 2022-12-11 12:55:53 2022-12-11 06:20:02
philippines_data <- philippines_tweets %>%  mutate(Created_At_Round = created %>% round(units = 'hours') %>% as.POSIXct())
mn <- philippines_tweets %>% pull(created) %>% min()
 
mx <- philippines_tweets %>% pull(created) %>% max()

Plot tweets using the library(plotly) and ggplot().

philData_plotting <- ggplot(philippines_data, aes(x = Created_At_Round)) +
  geom_histogram(aes(fill = ..count..)) +
  theme(legend.position = "right") +
  xlab("Time") + ylab("Number of tweets") + 
  scale_fill_gradient(low = "midnightblue", high = "aquamarine4")

philData_plotting %>% ggplotly()
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## ℹ The deprecated feature was likely used in the ggplot2 package.
##   Please report the issue at <]8;;https://github.com/tidyverse/ggplot2/issueshttps://github.com/tidyverse/ggplot2/issues]8;;>.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Retweets

philippines_tweets2 <- philippinesDF %>%
  select(screenName,text,created, isRetweet) %>% filter(isRetweet == TRUE)

Grouping the data created

philippines_tweets2 %>%  
  group_by(1) %>%  
  summarise(max = max(created), min = min(created))
## # A tibble: 1 × 3
##     `1` max                 min                
##   <dbl> <dttm>              <dttm>             
## 1     1 2022-12-11 12:55:54 2022-12-11 06:19:34
philippines_data2 <- philippines_tweets2 %>%  mutate(Created_At_Round = created %>% round(units = 'hours') %>% as.POSIXct())
mn <- philippines_tweets2 %>% pull(created) %>% min()
 
mx <- philippines_tweets2 %>% pull(created) %>% max()

Plot retweets using the library(plotly) and ggplot().

philData_plotting2 <- ggplot(philippines_data2, aes(x = Created_At_Round)) +
  geom_histogram(aes(fill = ..count..)) +
  theme(legend.position = "right") +
  xlab("Time") + ylab("Number of tweets") + 
  scale_fill_gradient(low = "midnightblue", high = "aquamarine4")

philData_plotting2 %>% ggplotly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.